library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggpubr)
library(rstatix)
##
## Attaching package: 'rstatix'
##
## The following object is masked from 'package:stats':
##
## filter
library(nparLD)
## Loading required package: MASS
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:rstatix':
##
## select
##
## The following object is masked from 'package:dplyr':
##
## select
library(PMCMRplus)
library(WRS2)
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns
import numpy as np
from scipy.stats import pearsonr, spearmanr
import os
#import decoupler as dc
import gseapy as gp
from gseapy.parser import gsea_gmt_parser
from scipy.stats import wilcoxon, ttest_ind, ttest_rel
from statsmodels.stats.multitest import multipletests
from glob import glob
from statannotations.Annotator import Annotator
plt.rcParams.update({'font.size': 14})
annot = pd.read_csv('../data/batches12_chemo_regimen_annotation.tsv', sep='\t', index_col=0)
annot['Primary Study ID'] = annot.loc[:, 'Primary Study ID'].astype(str)
pt_reg = annot.set_index('Primary Study ID').loc[:, 'chemo_regimen'].to_dict()
platinum_type_dict = annot.set_index('Primary Study ID').loc[:, 'platinum_type'].to_dict()
#tur_cys_palette = {'TUR': 'darkorange', 'Cystectomy': 'deepskyblue'}
#tur_cys_palette = {'Cystectomy': 'cornflowerblue', 'TUR': 'lightcoral'}
def dyn_comp(u, ax, lin_dens3, log_dens3, title, xlabel, test='t-test_paired', test_name = 't-test paired', size=3, format_simple = True):
sns.boxplot(y=u, x='sample_type', data = lin_dens3, ax=ax, order = ['TUR', 'Cystectomy'],
palette = tur_cys_palette, showfliers=False)
sns.stripplot(y=u, x='sample_type', data = lin_dens3, ax=ax, order = ['TUR', 'Cystectomy'],
color='black', size=size)
sns.lineplot(y=u, x='sample_type', data = lin_dens3, ax=ax, units = 'pt_number', estimator=None, linewidth=0.35, color = 'grey')
ax.set_yscale("log")
if format_simple:
ax.yaxis.set_major_formatter(matplotlib.ticker.ScalarFormatter())
ax.set_xlabel('')
ax.set_ylabel(xlabel)
ax.set_title(title)
annotator = Annotator(ax, pairs = [('TUR', 'Cystectomy')], data = log_dens3, x='sample_type', y=u, order=['TUR', 'Cystectomy'])
annotator.configure(test=test, text_format = 'simple', comparisons_correction='BH')
_, test_results = annotator.apply_test()._get_output()
pv = test_results[0].data.pvalue
annotator.set_custom_annotations(['{} p={:.2g}'.format(test_name, pv)])
annotator.annotate()
def dyn_comp_lin(u, ax, lin_dens3, title, xlabel, test='t-test_paired', test_name = 't-test paired', size=3):
sns.boxplot(y=u, x='sample_type', data = lin_dens3, ax=ax, order = ['TUR', 'Cystectomy'],
palette = tur_cys_palette, showfliers=False)
sns.stripplot(y=u, x='sample_type', data = lin_dens3, ax=ax, order = ['TUR', 'Cystectomy'],
color='black', size=size)
sns.lineplot(y=u, x='sample_type', data = lin_dens3, ax=ax, units = 'pt_number', estimator=None, linewidth=0.35, color = 'grey')
ax.set_xlabel('')
ax.set_ylabel(xlabel)
ax.set_title(title)
annotator = Annotator(ax, pairs = [('TUR', 'Cystectomy')], data = lin_dens3, x='sample_type', y=u, order=['TUR', 'Cystectomy'])
annotator.configure(test=test, text_format = 'simple', comparisons_correction='BH')
_, test_results = annotator.apply_test()._get_output()
pv = test_results[0].data.pvalue
annotator.set_custom_annotations(['{} p={:.2g}'.format(test_name, pv)])
annotator.annotate()
def dyn_comp_delta(u, ax, lin_dens3, title, xlabel, to_compare, order, test = 't-test_paired', size=3, ofs = 1):
sns.boxplot(y=u, x=to_compare, data = lin_dens3, ax=ax, order = order,
color = 'mediumseagreen', showfliers=False)
sns.swarmplot(y=u, x=to_compare, data = lin_dens3, ax=ax, order = order,
color='black', size=size)
sns.lineplot(y=u, x=to_compare, data = lin_dens3, ax=ax, units = 'pt_number', estimator=None, linewidth=0.7)
ax.set_xlabel('')
ax.set_ylabel(xlabel)
ax.set_title(title)
annotator = Annotator(ax, pairs = [order], data = lin_dens3, x=to_compare, y=u, order=order)
annotator.configure(test=test, text_format = 'simple', comparisons_correction='BH', line_offset = ofs, line_offset_to_group = ofs)
_, test_results = annotator.apply_test()._get_output()
pv = test_results[0].data.pvalue
annotator.set_custom_annotations(['{} p={:.2g}'.format(test, pv)])
annotator.annotate(line_offset = ofs, line_offset_to_group = ofs)
Density simple comparison
norm_log_dens = pd.read_csv('../data/combined_norm_log10_density.tsv', sep='\t', index_col=0)
norm_log_dens_tumor = pd.read_csv('../data/tumor_norm_log10_density.tsv', sep='\t', index_col=0)
norm_log_dens_stroma = pd.read_csv('../data/stroma_norm_log10_density.tsv', sep='\t', index_col=0)
raw_dens = pd.read_csv('../data/all_densities_combined_kde_mm2.tsv', sep='\t', index_col=0)
dens_to_comp = ['B_cells', 'CD4_Tcells', 'Macrophages', 'Tregs', 'CD8_Tcells', 'PanCK+_cells', 'Negative_cells']
def transform_dens(norm_log_dens, addition = 0):
norm_log_dens = norm_log_dens.loc[:, dens_to_comp] + addition
lin_dens2 = (10**norm_log_dens).assign(sample_type = norm_log_dens.index.str.split(' ').str[1]).assign(pt_number = norm_log_dens.index.str.split(' ').str[0])
lin_dens3 = lin_dens2[lin_dens2.pt_number.isin(lin_dens2.pt_number.value_counts()[lin_dens2.pt_number.value_counts() == 2].index)].sort_values(['pt_number', 'sample_type'], ascending=False)
lin_dens3 = lin_dens3.assign(chemo_regimen = lin_dens3.pt_number.map(pt_reg)).assign(platinum_type = lin_dens3.pt_number.map(platinum_type_dict))
log_dens2 = norm_log_dens.assign(sample_type = norm_log_dens.index.str.split(' ').str[1]).assign(pt_number = norm_log_dens.index.str.split(' ').str[0])
log_dens3 = log_dens2[log_dens2.pt_number.isin(log_dens2.pt_number.value_counts()[log_dens2.pt_number.value_counts() == 2].index)].sort_values(['pt_number', 'sample_type'], ascending=False)
log_dens3 = log_dens3.assign(chemo_regimen = log_dens3.pt_number.map(pt_reg)).assign(platinum_type = log_dens3.pt_number.map(platinum_type_dict))
return lin_dens3, log_dens3
lin_dens3, log_dens3 = transform_dens(norm_log_dens, addition = 2)
raw_lin_dens3, raw_log_dens3 = transform_dens(raw_dens)
lin_dens3_tumor, log_dens3_tumor = transform_dens(norm_log_dens_tumor, addition = 2)
lin_dens3_stroma, log_dens3_stroma = transform_dens(norm_log_dens_stroma, addition = 2)
PDL1 simple comparison
pdl1 = pd.read_csv('../data/Prepost NAC patients overview PD-L1.csv', sep=';', index_col=0)
pdl1_cys = pdl1.loc[~pdl1.index.isna(), ['Material.1', 'CPS (%).1', 'IC (%).1', 'TC (%).1']].dropna()
pdl1_tur = pdl1.loc[~pdl1.index.isna(), ['Material', 'CPS (%)', 'IC (%)', 'TC (%)']].dropna().replace('76,5', '76.5')
pdl1_tur.index = pdl1_tur.index.astype(int).astype(str) + ' TUR'
pdl1_cys.index = pdl1_cys.index.astype(int).astype(str) + ' Cystectomy'
pdl1_cys.columns = ['Material', 'CPS (%)', 'IC (%)', 'TC (%)']
pdl1_tur = pdl1_tur.drop('Material', axis=1).astype(float)+1
pdl1_cys = pdl1_cys.drop('Material', axis=1).astype(float)+1
pdl1_df = pd.concat([pdl1_tur.assign(sample_type = 'TUR'), pdl1_cys.assign(sample_type = 'Cystectomy')])
pdl1_df_log = pd.concat([np.log10(pdl1_tur).assign(sample_type = 'TUR'), np.log10(pdl1_cys).assign(sample_type = 'Cystectomy')])
pdl1_df2 = pdl1_df.assign(pt_number = pdl1_df.index.str.split(' ').str[0])
pdl1_df3 = pdl1_df2[pdl1_df2.pt_number.isin(pdl1_df2.pt_number.value_counts()[pdl1_df2.pt_number.value_counts() == 2].index)].sort_values(['pt_number', 'sample_type'], ascending=False)
pdl1_df3 = pdl1_df3.assign(platinum_type = pdl1_df3.pt_number.map(platinum_type_dict)).assign(chemo_regimen = pdl1_df3.pt_number.map(pt_reg))
pdl1_df2_log = pdl1_df_log.assign(pt_number = pdl1_df_log.index.str.split(' ').str[0])
pdl1_df3_log = pdl1_df2_log[pdl1_df2_log.pt_number.isin(pdl1_df2_log.pt_number.value_counts()[pdl1_df2_log.pt_number.value_counts() == 2].index)].sort_values(['pt_number', 'sample_type'], ascending=False)
pdl1_df3_log = pdl1_df3_log.assign(platinum_type = pdl1_df3_log.pt_number.map(platinum_type_dict)).assign(chemo_regimen = pdl1_df3_log.pt_number.map(pt_reg))
RNA signatures simple comparison
ssgsea = pd.read_csv('../data/ssgsea_scores_Fig2.tsv', sep='\t', index_col=0)
ssgsea = ssgsea.reset_index().assign(sample_type = ssgsea.reset_index().loc[:, 'index'].apply(lambda x: x.split(' ')[1])).assign(pt_number = ssgsea.reset_index().loc[:, 'index'].apply(lambda x: x.split(' ')[0])).set_index('index')
ps_ssgsea0 = ssgsea[ssgsea.pt_number.isin(ssgsea.pt_number.value_counts()[ssgsea.pt_number.value_counts() == 2].index)].sort_values(by = ['pt_number', 'sample_type'], ascending=False)
ps_ssgsea0 = ps_ssgsea0.assign(chemo_regimen = ps_ssgsea0.pt_number.map(pt_reg)).assign(platinum_type = ps_ssgsea0.pt_number.map(platinum_type_dict))
tur_cys_palette = {'TUR': 'lightcoral', 'Cystectomy': 'lightskyblue'}
def to_cisplatin(df):
df = df[df.platinum_type == 'Cisplatin']
return df
lin_dens3 = to_cisplatin(lin_dens3)
log_dens3 = to_cisplatin(log_dens3)
lin_dens3_tumor = to_cisplatin(lin_dens3_tumor)
log_dens3_tumor = to_cisplatin(log_dens3_tumor)
lin_dens3_stroma = to_cisplatin(lin_dens3_stroma)
log_dens3_stroma = to_cisplatin(log_dens3_stroma)
pdl1_df3 = to_cisplatin(pdl1_df3)
pdl1_df3_log = to_cisplatin(pdl1_df3_log)
ps_ssgsea0 = to_cisplatin(ps_ssgsea0)
plt.close()
fig, axs = plt.subplots(3, 3, figsize = (15, 15))
af = axs.flat
dyn_comp('CD8_Tcells', af[2], lin_dens3, log_dens3, '', 'CD8 T cell percentage in tumor + stroma, %')
## TUR vs. Cystectomy: t-test paired p=0.0017
dyn_comp('CD8_Tcells', af[0], lin_dens3_tumor, log_dens3_tumor, '', 'CD8 T cell percentage in tumor, %')
## TUR vs. Cystectomy: t-test paired p=0.015
dyn_comp('CD8_Tcells', af[1], lin_dens3_stroma, log_dens3_stroma, '', 'CD8 T cell percentage in stroma, %')
## TUR vs. Cystectomy: t-test paired p=0.0091
af[0].set_ylim((0.01), 200)
## (0.01, 200)
af[1].set_ylim((0.01), 200)
## (0.01, 200)
af[2].set_ylim((0.01), 200)
## (0.01, 200)
for i in range(3):
af[i].set_yticks([0.01,0.1,1, 10, 100], ['0.01','0.1','1', '10', '100'])
for i,u in enumerate(['IC (%)', 'TC (%)', 'CPS (%)']):
dyn_comp(u, af[i+3], pdl1_df3, pdl1_df3_log, '', '{} PDL1 score'.format(u), test='Wilcoxon', test_name='Wilcoxon')
af[i+3].set_ylim((0.5, 300))
af[i+3].set_yticks([1,10,100], ['1','10', '100'])
ps_ssgsea = ps_ssgsea0.copy()
dyn_comp_lin('TGFB_Mariathasan', af[6], ps_ssgsea, '', 'fibroblast TGFb ssGSEA score')
## TUR vs. Cystectomy: t-test paired p=6.5e-06
dyn_comp_lin('IFNG_signature', af[7], ps_ssgsea, '', 'INFg ssGSEA score')
## TUR vs. Cystectomy: t-test paired p=0.46
dyn_comp_lin('CD8_T_EFFECTOR', af[8], ps_ssgsea, '', 'effector CD8 T cell ssGSEA score')
## TUR vs. Cystectomy: t-test paired p=0.26
for i in range(9):
af[i].set_xticks(['TUR', 'Cystectomy'], ['TUR-BT', 'Cystectomy'])
plt.subplots_adjust(hspace=0.2, wspace=0.4)
plt.show()

plt.close()
fig, axs = plt.subplots(2, 6, figsize = (22.7, 11))
af = axs.flat
cell_names = {'CD8_Tcells': 'CD8 T cell', 'Macrophages': 'Macrophage', 'CD4_Tcells': 'T helper', 'Tregs': 'Treg', 'B_cells': 'B cell', 'PanCK+_cells': 'Cancer cell', 'Negative_cells': 'Negative cell'}
for i, u in enumerate(['CD8_Tcells', 'Macrophages', 'CD4_Tcells', 'Tregs', 'B_cells', 'PanCK+_cells']):
dyn_comp(u, af[i], lin_dens3, log_dens3, '', '{} percentage, %'.format(cell_names[u]), test='Wilcoxon', test_name='Wilcoxon')
af[i].set_xticks(['TUR', 'Cystectomy'], ['TUR-BT', 'Cystectomy'])
for i, u in enumerate(['CD8_Tcells', 'Macrophages', 'CD4_Tcells', 'Tregs', 'B_cells', 'PanCK+_cells']):
dyn_comp(u, af[i+6], raw_lin_dens3, raw_log_dens3, '', '{} density, cells/mm2'.format(cell_names[u]), test='Wilcoxon', test_name='Wilcoxon', format_simple = False)
af[i+6].set_xticks(['TUR', 'Cystectomy'], ['TUR-BT', 'Cystectomy'])
plt.subplots_adjust(hspace=0.2, wspace=0.47)
plt.show()

Mixed ANOVA graphs
cys_den = log_dens3[log_dens3.sample_type == 'Cystectomy'].set_index('pt_number')
tur_den = log_dens3[log_dens3.sample_type == 'TUR'].set_index('pt_number')
common_in = cys_den.index.intersection(tur_den.index)
delta_den = cys_den.loc[common_in].drop(['sample_type', 'chemo_regimen', 'platinum_type'], axis=1) - tur_den.loc[common_in].drop(['sample_type', 'chemo_regimen', 'platinum_type'], axis=1)
delta_den = delta_den.assign(pt_number = delta_den.index)
delta_den = delta_den.assign(chemo_regimen = delta_den.pt_number.map(pt_reg)).assign(platinum_type = delta_den.pt_number.map(platinum_type_dict))
cys_ps= ps_ssgsea0[ps_ssgsea0.sample_type == 'Cystectomy'].set_index('pt_number')
tur_ps= ps_ssgsea0[ps_ssgsea0.sample_type == 'TUR'].set_index('pt_number')
common_in = cys_ps.index.intersection(tur_ps.index)
delta_ps = cys_ps.loc[common_in].drop(['sample_type', 'chemo_regimen', 'platinum_type'], axis=1) - tur_ps.loc[common_in].drop(['sample_type', 'chemo_regimen', 'platinum_type'], axis=1)
delta_ps = delta_ps.assign(pt_number = delta_ps.index)
delta_ps = delta_ps.assign(chemo_regimen = delta_ps.pt_number.map(pt_reg)).assign(platinum_type = delta_ps.pt_number.map(platinum_type_dict))
all_med = pd.read_csv('../data/all_median_distances_wo_offset.tsv', sep='\t', index_col=0)
all_med_log = np.log10(all_med.drop(['sample_type', 'pt_number'], axis=1)).assign(sample_type = all_med.index.str.split(' ').str[1]).assign(pt_number = all_med.index.str.split(' ').str[0])
all_med_lin = all_med.drop(['sample_type'], axis=1).assign(sample_type = all_med.index.str.split(' ').str[1]).assign(pt_number = all_med.index.str.split(' ').str[0])
all_med_log = all_med_log[all_med_log.pt_number.isin(all_med_log.pt_number.value_counts()[all_med_log.pt_number.value_counts() == 2].index)].sort_values(['pt_number', 'sample_type'], ascending=False).assign(chemo_regimen = all_med_log.pt_number.map(pt_reg)).assign(platinum_type =all_med_log.pt_number.map(platinum_type_dict))
all_med_lin = all_med_lin[all_med_lin.pt_number.isin(all_med_lin.pt_number.value_counts()[all_med_lin.pt_number.value_counts() == 2].index)].sort_values(['pt_number', 'sample_type'], ascending=False).assign(chemo_regimen = all_med_lin.pt_number.map(pt_reg)).assign(platinum_type = all_med_lin.pt_number.map(platinum_type_dict))
all_med_lin = to_cisplatin(all_med_lin)
all_med_log = to_cisplatin(all_med_log)
plt.close()
fig, axs = plt.subplots(1, 2, figsize = (8, 5))
af = axs.flat
dyn_comp('CD8_Tcell_to_tumor_median', af[0], all_med_lin, all_med_log, '', 'CD8 T cell to 1-NN cancer cell\nmedian distance, um', test='Wilcoxon', test_name='Wilcoxon')
## TUR vs. Cystectomy: Wilcoxon p=2.2e-05
dyn_comp('Macrophage_to_tumor_median', af[1], all_med_lin, all_med_log, '', 'Macrophage to 1-NN cancer cell\nmedian distance, um', test='Wilcoxon', test_name='Wilcoxon')
## TUR vs. Cystectomy: Wilcoxon p=3.1e-05
af[0].set_yticks([10, 50, 100, 500], ['10','50','100','500'])
af[1].set_yticks([10, 50, 100], ['10','50','100'])
#af[0].set_ylim((0.0001), 2)
#af[1].set_ylim((0.0001), 2)
for i in range(2):
af[i].set_xticks(['TUR', 'Cystectomy'], ['TUR-BT', 'Cystectomy'])
#_ = fig.set_tight_layout(True)
plt.subplots_adjust(hspace=0.2, wspace=0.5)
plt.show()

tur_dist2 = (all_med_log[all_med_log.sample_type == 'TUR']).set_index('pt_number')
cys_dist2 = (all_med_log[all_med_log.sample_type == 'Cystectomy']).set_index('pt_number')
common_index0 = tur_dist2.index.intersection(cys_dist2.index)
delta_dist0 = cys_dist2.loc[common_index0, ['CD8_Tcell_to_tumor_median', 'Macrophage_to_tumor_median']] - tur_dist2.loc[common_index0, ['CD8_Tcell_to_tumor_median', 'Macrophage_to_tumor_median']]
delta_dist0 = delta_dist0.assign(pt_number = delta_dist0.index.str.split(' ').str[0])
delta_dist0 = delta_dist0.assign(chemo_regimen = delta_dist0.pt_number.map(pt_reg)).assign(platinum_type = delta_dist0.pt_number.map(platinum_type_dict)).dropna(subset = ['chemo_regimen'])
pdl1_df3_log = pdl1_df3_log.rename(columns = {'IC (%)': 'IC', 'CPS (%)': 'CPS', 'TC (%)': 'TC'}).dropna()
pdl1_df3 = pdl1_df3.rename(columns = {'IC (%)': 'IC', 'CPS (%)': 'CPS', 'TC (%)': 'TC'}).dropna()
cys_pdl = pdl1_df3_log[pdl1_df3_log.sample_type == 'Cystectomy'].set_index('pt_number')
tur_pdl = pdl1_df3_log[pdl1_df3_log.sample_type == 'TUR'].set_index('pt_number')
common_in = cys_pdl.index.intersection(tur_pdl.index)
delta_pdl = cys_pdl.loc[common_in].drop(['sample_type', 'chemo_regimen', 'platinum_type'], axis=1) - tur_pdl.loc[common_in].drop(['sample_type', 'chemo_regimen', 'platinum_type'], axis=1)
delta_pdl = delta_pdl.assign(pt_number = delta_pdl.index)
delta_pdl = delta_pdl.assign(chemo_regimen = delta_pdl.pt_number.map(pt_reg)).assign(platinum_type = delta_pdl.pt_number.map(platinum_type_dict))
def dyn_comp_anova(u, ax, lin_dens3, log_dens3, title, xlabel, loc_leg, test='t-test_paired', test_name='t-test paired'):
sns.boxplot(y=u, x = 'chemo_regimen', hue='sample_type', data = lin_dens3, ax=ax, hue_order = ['TUR', 'Cystectomy'], order = ['MVAC', 'gemcitabine_platinum'],
palette = tur_cys_palette, showfliers=False)
sns.stripplot(y=u, x = 'chemo_regimen', hue='sample_type', data = lin_dens3, ax=ax, hue_order = ['TUR', 'Cystectomy'], order = ['MVAC', 'gemcitabine_platinum'],
color='black', dodge=True, size=3)
#sns.lineplot(y=u, x='chemo_regimen', hue='sample_type', data = lin_dens3, ax=ax, units = 'pt_number', estimator=None, linewidth=0.5, color = 'grey')
ax.set_yscale("log")
ax.set_xlabel('')
ax.set_ylabel(xlabel)
ax.set_title(title)
annotator = Annotator(ax, pairs = [(('MVAC', 'TUR'), ('MVAC', 'Cystectomy')), (('gemcitabine_platinum', 'TUR'), ('gemcitabine_platinum', 'Cystectomy'))], data = log_dens3, x='chemo_regimen', hue = 'sample_type', y=u, hue_order = ['TUR', 'Cystectomy'], order = ['MVAC', 'gemcitabine_platinum'])
annotator.configure(test=test, text_format = 'simple', comparisons_correction='BH')
_, test_results = annotator.apply_test()._get_output()
#pv = test_results[0].data.pvalue
annotator.set_custom_annotations(['{} p={:.2g}'.format(test_name, test_results[0].data.pvalue), '{} p={:.2g}'.format(test_name, test_results[1].data.pvalue)])
annotator.annotate()
handles, labels = ax.get_legend_handles_labels()
ax.legend(handles[0:2], ['TUR-BT', 'Cystectomy'], loc=loc_leg)
def dyn_comp_lin_anova(u, ax, lin_dens3, title, xlabel, loc_leg, test='t-test_paired', test_name='t-test paired'):
sns.boxplot(y=u, x = 'chemo_regimen', hue='sample_type', data = lin_dens3, ax=ax, hue_order = ['TUR', 'Cystectomy'], order = ['MVAC', 'gemcitabine_platinum'],
palette = tur_cys_palette, showfliers=False)
sns.stripplot(y=u, x = 'chemo_regimen', hue='sample_type', data = lin_dens3, ax=ax, hue_order = ['TUR', 'Cystectomy'], order = ['MVAC', 'gemcitabine_platinum'],
color='black', dodge=True, size=3)
#sns.lineplot(y=u, hue='chemo_regimen', x='sample_type', data = lin_dens3, ax=ax, units = 'pt_number', estimator=None, linewidth=0.5, color = 'grey')
ax.set_xlabel('')
ax.set_ylabel(xlabel)
ax.set_title(title)
annotator = Annotator(ax, pairs = [(('MVAC', 'TUR'), ('MVAC', 'Cystectomy')), (('gemcitabine_platinum', 'TUR'), ('gemcitabine_platinum', 'Cystectomy'))], data = lin_dens3, x='chemo_regimen', hue = 'sample_type', y=u, hue_order = ['TUR', 'Cystectomy'], order = ['MVAC', 'gemcitabine_platinum'])
annotator.configure(test=test, text_format = 'simple', comparisons_correction='BH')
_, test_results = annotator.apply_test()._get_output()
#pv = test_results[0].data.pvalue
annotator.set_custom_annotations(['{} p={:.2g}'.format(test_name, test_results[0].data.pvalue), '{} paired p={:.2g}'.format(test_name, test_results[1].data.pvalue)])
annotator.annotate()
handles, labels = ax.get_legend_handles_labels()
ax.legend(handles[0:2], ['TUR-BT', 'Cystectomy'], loc=loc_leg)
plt.rcParams.update({'font.size': 15})
testt = 'Wilcoxon'
testt_name = 'Wilcoxon'
plt.close()
fig, axs = plt.subplots(2, 2, figsize = (14.5, 12.5), width_ratios = [10, 5])
af = axs.flat
u = 'Macrophages'
dyn_comp_anova(u, af[0], lin_dens3, log_dens3, '', 'Macrophage cell percentage', loc_leg=(0.3, 0.03), test='Wilcoxon', test_name = 'Wilcoxon')
## gemcitabine_platinum_TUR vs. gemcitabine_platinum_Cystectomy: Wilcoxon p=0.84
## MVAC_TUR vs. MVAC_Cystectomy: Wilcoxon p=0.0011
##
## /home/m.chelushkin/.local/lib/python3.8/site-packages/seaborn/categorical.py:166: FutureWarning: Setting a gradient palette using color= is deprecated and will be removed in version 0.13. Set `palette='dark:black'` for same effect.
## warnings.warn(msg, FutureWarning)
dyn_comp_delta(u, af[1], delta_den, '', 'Δ log10(macrophage percentage)\nupon treatment', to_compare = 'chemo_regimen', order = ('MVAC', 'gemcitabine_platinum'), test = 'Mann-Whitney', ofs = -0.08, size=5)
## MVAC vs. gemcitabine_platinum: Mann-Whitney p=0.015
af[0].set_xticklabels(['MVAC', 'gemcitabine +\nplatinum'])
af[1].set_xticklabels(['MVAC', 'gemcitabine +\nplatinum'])
af[1].set_xlim((-0.7,1.7))
## (-0.7, 1.7)
af[1].set_ylim((-1.4,2.8))
## (-1.4, 2.8)
af[0].set_yticks([0.1,1, 10, 100], ['0.1','1', '10', '100'])
u = 'TGFB_Mariathasan'
dyn_comp_lin_anova(u, af[2], ps_ssgsea0, '', 'Fibroblast TGFb ssGSEA score', loc_leg='lower right', test='Wilcoxon', test_name = 'Wilcoxon')
## gemcitabine_platinum_TUR vs. gemcitabine_platinum_Cystectomy: Wilcoxon p=8.5e-05
## MVAC_TUR vs. MVAC_Cystectomy: Wilcoxon paired p=0.11
##
## /home/m.chelushkin/.local/lib/python3.8/site-packages/seaborn/categorical.py:166: FutureWarning: Setting a gradient palette using color= is deprecated and will be removed in version 0.13. Set `palette='dark:black'` for same effect.
## warnings.warn(msg, FutureWarning)
dyn_comp_delta(u, af[3], delta_ps, '', 'Δ fibroblast TGFb upon treatment', to_compare = 'chemo_regimen', order = ('MVAC', 'gemcitabine_platinum'), test = 'Mann-Whitney', ofs=0.1, size=5)
## MVAC vs. gemcitabine_platinum: Mann-Whitney p=0.038
af[2].set_xticklabels(['MVAC', 'gemcitabine +\nplatinum'])
af[3].set_xticklabels(['MVAC', 'gemcitabine +\nplatinum'])
af[3].set_xlim((-0.7,1.7))
## (-0.7, 1.7)
#_ = fig.set_tight_layout(True)
plt.subplots_adjust(hspace=0.3, wspace=0.25)
plt.show()

Mixed ANOVA models
densities
lin_dens3_cr = lin_dens3[lin_dens3.chemo_regimen.isin(['MVAC', 'gemcitabine_platinum'])]
#lin_dens3_ptt = lin_dens3[lin_dens3.platinum_type.isin(['Cisplatin', 'Carboplatin'])]
log_dens3_cr = log_dens3[log_dens3.chemo_regimen.isin(['MVAC', 'gemcitabine_platinum'])]
log_dens3_ptt = log_dens3[log_dens3.platinum_type.isin(['Cisplatin', 'Carboplatin'])]
CD8 T cells
py$log_dens3_cr %>%
group_by(sample_type, chemo_regimen) %>%
shapiro_test(CD8_Tcells)
## # A tibble: 4 × 5
## sample_type chemo_regimen variable statistic p
## <chr> <chr> <chr> <dbl> <dbl>
## 1 Cystectomy MVAC CD8_Tcells 0.985 0.936
## 2 Cystectomy gemcitabine_platinum CD8_Tcells 0.979 0.680
## 3 TUR MVAC CD8_Tcells 0.969 0.499
## 4 TUR gemcitabine_platinum CD8_Tcells 0.981 0.772
py$log_dens3_cr %>%
group_by(sample_type) %>%
levene_test(CD8_Tcells ~ chemo_regimen)
## Warning: There were 2 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `data = map(.data$data, .f, ...)`.
## Caused by warning in `leveneTest.default()`:
## ! group coerced to factor.
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.
## # A tibble: 2 × 5
## sample_type df1 df2 statistic p
## <chr> <int> <int> <dbl> <dbl>
## 1 Cystectomy 1 66 0.591 0.445
## 2 TUR 1 66 1.20 0.277
box_m(py$log_dens3_cr[, "CD8_Tcells", drop = FALSE], py$log_dens3_cr$chemo_regimen)
## # A tibble: 1 × 4
## statistic p.value parameter method
## <dbl> <dbl> <dbl> <chr>
## 1 0.760 0.383 1 Box's M-test for Homogeneity of Covariance Matric…
res.aov <- anova_test(
data = py$log_dens3_cr, dv = CD8_Tcells, wid = pt_number,
between = chemo_regimen, within = sample_type
)
get_anova_table(res.aov)
## ANOVA Table (type III tests)
##
## Effect DFn DFd F p p<.05 ges
## 1 chemo_regimen 1 66 0.410 0.524 0.004
## 2 sample_type 1 66 11.124 0.001 * 0.048
## 3 chemo_regimen:sample_type 1 66 0.973 0.328 0.004
bwtrim(CD8_Tcells ~ chemo_regimen*sample_type, id = pt_number, data = py$log_dens3_cr)
## Call:
## bwtrim(formula = CD8_Tcells ~ chemo_regimen * sample_type, id = pt_number,
## data = py$log_dens3_cr)
##
## value df1 df2 p.value
## chemo_regimen 0.0660 1 43.9789 0.7984
## sample_type 6.0601 1 41.1155 0.0181
## chemo_regimen:sample_type 0.4322 1 41.1155 0.5146
Macrophages
py$log_dens3_cr %>%
group_by(sample_type, chemo_regimen) %>%
shapiro_test(Macrophages)
## # A tibble: 4 × 5
## sample_type chemo_regimen variable statistic p
## <chr> <chr> <chr> <dbl> <dbl>
## 1 Cystectomy MVAC Macrophages 0.969 0.499
## 2 Cystectomy gemcitabine_platinum Macrophages 0.850 0.000156
## 3 TUR MVAC Macrophages 0.881 0.00254
## 4 TUR gemcitabine_platinum Macrophages 0.935 0.0329
py$log_dens3_cr %>%
group_by(sample_type) %>%
levene_test(Macrophages ~ chemo_regimen)
## Warning: There were 2 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `data = map(.data$data, .f, ...)`.
## Caused by warning in `leveneTest.default()`:
## ! group coerced to factor.
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.
## # A tibble: 2 × 5
## sample_type df1 df2 statistic p
## <chr> <int> <int> <dbl> <dbl>
## 1 Cystectomy 1 66 1.25 0.267
## 2 TUR 1 66 0.118 0.732
box_m(py$log_dens3_cr[, "Macrophages", drop = FALSE], py$log_dens3_cr$chemo_regimen)
## # A tibble: 1 × 4
## statistic p.value parameter method
## <dbl> <dbl> <dbl> <chr>
## 1 0.0576 0.810 1 Box's M-test for Homogeneity of Covariance Matric…
bwtrim(Macrophages ~ chemo_regimen*sample_type, id = pt_number, data = py$log_dens3_cr)
## Call:
## bwtrim(formula = Macrophages ~ chemo_regimen * sample_type, id = pt_number,
## data = py$log_dens3_cr)
##
## value df1 df2 p.value
## chemo_regimen 0.9185 1 40.8416 0.3435
## sample_type 7.7773 1 43.8798 0.0078
## chemo_regimen:sample_type 3.3520 1 43.8798 0.0739
res.aov <- anova_test(
data = py$log_dens3_cr, dv = Macrophages, wid = pt_number,
between = chemo_regimen, within = sample_type
)
get_anova_table(res.aov)
## ANOVA Table (type III tests)
##
## Effect DFn DFd F p p<.05 ges
## 1 chemo_regimen 1 66 1.277 0.263 0.013
## 2 sample_type 1 66 9.424 0.003 * 0.041
## 3 chemo_regimen:sample_type 1 66 6.955 0.010 * 0.031
f1.ld.f1(y = py$log_dens3_cr$Macrophages, time = py$log_dens3_cr$sample_type, group = py$log_dens3_cr$chemo_regimen, subject = py$log_dens3_cr$pt_number, time.order = c('TUR', 'Cystectomy'), group.order = c('MVAC', 'gemcitabine_platinum'), description=F)$ANOVA.test
## F1 LD F1 Model
## -----------------------
## Check that the order of the time and group levels are correct.
## Time level: TUR Cystectomy
## Group level: MVAC gemcitabine_platinum
## If the order is not correct, specify the correct order in time.order or group.order.

## Statistic df p-value
## Group 1.114216 1 0.291167239
## Time 9.292243 1 0.002301263
## Group:Time 7.422676 1 0.006440696
TGFb signature
ps_ssgsea_cr = ps_ssgsea0[ps_ssgsea0.chemo_regimen.isin(['MVAC', 'gemcitabine_platinum'])]#.drop(['15 TUR', '15 Cystectomy', '7 TUR', '7 Cystectomy', '31 TUR', '31 Cystectomy'])
py$ps_ssgsea_cr %>%
group_by(sample_type, chemo_regimen) %>%
shapiro_test(TGFB_Mariathasan)
## # A tibble: 4 × 5
## sample_type chemo_regimen variable statistic p
## <chr> <chr> <chr> <dbl> <dbl>
## 1 Cystectomy MVAC TGFB_Mariathasan 0.936 0.164
## 2 Cystectomy gemcitabine_platinum TGFB_Mariathasan 0.968 0.391
## 3 TUR MVAC TGFB_Mariathasan 0.926 0.102
## 4 TUR gemcitabine_platinum TGFB_Mariathasan 0.984 0.873
py$ps_ssgsea_cr %>%
group_by(sample_type) %>%
levene_test(TGFB_Mariathasan ~ chemo_regimen)
## Warning: There were 2 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `data = map(.data$data, .f, ...)`.
## Caused by warning in `leveneTest.default()`:
## ! group coerced to factor.
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.
## # A tibble: 2 × 5
## sample_type df1 df2 statistic p
## <chr> <int> <int> <dbl> <dbl>
## 1 Cystectomy 1 55 0.458 0.501
## 2 TUR 1 55 6.72 0.0122
box_m(py$ps_ssgsea_cr[, "TGFB_Mariathasan", drop = FALSE], py$ps_ssgsea_cr$chemo_regimen)
## # A tibble: 1 × 4
## statistic p.value parameter method
## <dbl> <dbl> <dbl> <chr>
## 1 3.38 0.0659 1 Box's M-test for Homogeneity of Covariance Matric…
res.aov <- anova_test(
data = py$ps_ssgsea_cr, dv = TGFB_Mariathasan, wid = pt_number,
between = chemo_regimen, within = sample_type
)
get_anova_table(res.aov)
## ANOVA Table (type III tests)
##
## Effect DFn DFd F p p<.05 ges
## 1 chemo_regimen 1 55 0.945 3.35e-01 0.010
## 2 sample_type 1 55 20.482 3.26e-05 * 0.129
## 3 chemo_regimen:sample_type 1 55 3.644 6.20e-02 0.026
tgf_df = bwtrim(TGFB_Mariathasan ~ chemo_regimen*sample_type, id = pt_number, data = py$ps_ssgsea_cr)
f1.ld.f1(y = py$ps_ssgsea_cr$TGFB_Mariathasan, time = py$ps_ssgsea_cr$sample_type, group = py$ps_ssgsea_cr$chemo_regimen, subject = py$ps_ssgsea_cr$pt_number, time.order = c('TUR', 'Cystectomy'), group.order = c('MVAC', 'gemcitabine_platinum'), description=F)$ANOVA.test
## F1 LD F1 Model
## -----------------------
## Check that the order of the time and group levels are correct.
## Time level: TUR Cystectomy
## Group level: MVAC gemcitabine_platinum
## If the order is not correct, specify the correct order in time.order or group.order.

## Statistic df p-value
## Group 0.3681926 1 5.439911e-01
## Time 21.1397173 1 4.269862e-06
## Group:Time 2.2215638 1 1.360951e-01
PDL1 score
pdl_cr = pdl1_df3_log[pdl1_df3_log.chemo_regimen.isin(['MVAC', 'gemcitabine_platinum'])]
py$pdl_cr %>%
group_by(sample_type, chemo_regimen) %>%
shapiro_test(IC)
## # A tibble: 4 × 5
## sample_type chemo_regimen variable statistic p
## <chr> <chr> <chr> <dbl> <dbl>
## 1 Cystectomy MVAC IC 0.797 0.0000351
## 2 Cystectomy gemcitabine_platinum IC 0.843 0.000366
## 3 TUR MVAC IC 0.867 0.000994
## 4 TUR gemcitabine_platinum IC 0.890 0.00400
py$pdl_cr %>%
group_by(sample_type) %>%
levene_test(IC ~ chemo_regimen)
## Warning: There were 2 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `data = map(.data$data, .f, ...)`.
## Caused by warning in `leveneTest.default()`:
## ! group coerced to factor.
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.
## # A tibble: 2 × 5
## sample_type df1 df2 statistic p
## <chr> <int> <int> <dbl> <dbl>
## 1 Cystectomy 1 61 2.47 0.122
## 2 TUR 1 61 0.0608 0.806
box_m(py$pdl_cr[, "IC", drop = FALSE], py$pdl_cr$chemo_regimen)
## # A tibble: 1 × 4
## statistic p.value parameter method
## <dbl> <dbl> <dbl> <chr>
## 1 2.10 0.147 1 Box's M-test for Homogeneity of Covariance Matric…
#bwtrim(IC ~ chemo_regimen*sample_type, id = pt_number, data = py$pdl_cr)
f1.ld.f1(y = py$pdl_cr$IC, time = py$pdl_cr$sample_type, group = py$pdl_cr$chemo_regimen, subject = py$pdl_cr$pt_number, time.order = c('TUR', 'Cystectomy'), group.order = c('MVAC', 'gemcitabine_platinum'), description=F)$ANOVA.test
## F1 LD F1 Model
## -----------------------
## Check that the order of the time and group levels are correct.
## Time level: TUR Cystectomy
## Group level: MVAC gemcitabine_platinum
## If the order is not correct, specify the correct order in time.order or group.order.

## Statistic df p-value
## Group 4.987890 1 0.0255253172
## Time 13.172406 1 0.0002841017
## Group:Time 3.543588 1 0.0597760199
res.aov <- anova_test(
data = py$pdl_cr, dv = IC, wid = pt_number,
between = chemo_regimen, within = sample_type
)
get_anova_table(res.aov)
## ANOVA Table (type III tests)
##
## Effect DFn DFd F p p<.05 ges
## 1 chemo_regimen 1 61 6.933 0.011000 * 0.060
## 2 sample_type 1 61 14.429 0.000338 * 0.093
## 3 chemo_regimen:sample_type 1 61 2.730 0.104000 0.019
distances
log_dist3_cr = all_med_log[all_med_log.chemo_regimen.isin(['MVAC', 'gemcitabine_platinum'])]
py$log_dist3_cr %>%
group_by(sample_type, chemo_regimen) %>%
shapiro_test(CD8_Tcell_to_tumor_median)
## # A tibble: 4 × 5
## sample_type chemo_regimen variable statistic p
## <chr> <chr> <chr> <dbl> <dbl>
## 1 Cystectomy MVAC CD8_Tcell_to_tumor_median 0.946 0.120
## 2 Cystectomy gemcitabine_platinum CD8_Tcell_to_tumor_median 0.957 0.159
## 3 TUR MVAC CD8_Tcell_to_tumor_median 0.779 0.0000217
## 4 TUR gemcitabine_platinum CD8_Tcell_to_tumor_median 0.959 0.189
py$log_dist3_cr %>%
group_by(sample_type) %>%
levene_test(CD8_Tcell_to_tumor_median ~ chemo_regimen)
## Warning: There were 2 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `data = map(.data$data, .f, ...)`.
## Caused by warning in `leveneTest.default()`:
## ! group coerced to factor.
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.
## # A tibble: 2 × 5
## sample_type df1 df2 statistic p
## <chr> <int> <int> <dbl> <dbl>
## 1 Cystectomy 1 66 0.495 0.484
## 2 TUR 1 66 1.76 0.190
box_m(py$log_dist3_cr[, "CD8_Tcell_to_tumor_median", drop = FALSE], py$log_dist3_cr$chemo_regimen)
## # A tibble: 1 × 4
## statistic p.value parameter method
## <dbl> <dbl> <dbl> <chr>
## 1 3.22 0.0728 1 Box's M-test for Homogeneity of Covariance Matric…
f1.ld.f1(y = py$log_dist3_cr$CD8_Tcell_to_tumor_median, time = py$log_dist3_cr$sample_type, group = py$log_dist3_cr$chemo_regimen, subject = py$log_dist3_cr$pt_number, time.order = c('TUR', 'Cystectomy'), group.order = c('MVAC', 'gemcitabine_platinum'), description=F)$ANOVA.test
## F1 LD F1 Model
## -----------------------
## Check that the order of the time and group levels are correct.
## Time level: TUR Cystectomy
## Group level: MVAC gemcitabine_platinum
## If the order is not correct, specify the correct order in time.order or group.order.

## Statistic df p-value
## Group 0.2179797 1 6.405836e-01
## Time 20.4518025 1 6.115193e-06
## Group:Time 0.2704191 1 6.030508e-01
res.aov <- anova_test(
data = py$log_dist3_cr, dv = CD8_Tcell_to_tumor_median, wid = pt_number,
between = chemo_regimen, within = sample_type
)
get_anova_table(res.aov)
## ANOVA Table (type III tests)
##
## Effect DFn DFd F p p<.05 ges
## 1 chemo_regimen 1 66 0.000992 0.975000 1.05e-05
## 2 sample_type 1 66 13.070000 0.000581 * 5.60e-02
## 3 chemo_regimen:sample_type 1 66 0.060000 0.807000 2.72e-04
py$log_dist3_cr %>%
group_by(sample_type, chemo_regimen) %>%
shapiro_test(Macrophage_to_tumor_median)
## # A tibble: 4 × 5
## sample_type chemo_regimen variable statistic p
## <chr> <chr> <chr> <dbl> <dbl>
## 1 Cystectomy MVAC Macrophage_to_tumor_median 0.846 4.10e-4
## 2 Cystectomy gemcitabine_platinum Macrophage_to_tumor_median 0.927 1.81e-2
## 3 TUR MVAC Macrophage_to_tumor_median 0.804 6.12e-5
## 4 TUR gemcitabine_platinum Macrophage_to_tumor_median 0.883 1.03e-3
py$log_dist3_cr %>%
group_by(sample_type) %>%
levene_test(Macrophage_to_tumor_median ~ chemo_regimen)
## Warning: There were 2 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `data = map(.data$data, .f, ...)`.
## Caused by warning in `leveneTest.default()`:
## ! group coerced to factor.
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.
## # A tibble: 2 × 5
## sample_type df1 df2 statistic p
## <chr> <int> <int> <dbl> <dbl>
## 1 Cystectomy 1 66 0.0739 0.787
## 2 TUR 1 66 2.15 0.147
box_m(py$log_dist3_cr[, "Macrophage_to_tumor_median", drop = FALSE], py$log_dist3_cr$chemo_regimen)
## # A tibble: 1 × 4
## statistic p.value parameter method
## <dbl> <dbl> <dbl> <chr>
## 1 4.41 0.0357 1 Box's M-test for Homogeneity of Covariance Matric…
f1.ld.f1(y = py$log_dist3_cr$Macrophage_to_tumor_median, time = py$log_dist3_cr$sample_type, group = py$log_dist3_cr$chemo_regimen, subject = py$log_dist3_cr$pt_number, time.order = c('TUR', 'Cystectomy'), group.order = c('MVAC', 'gemcitabine_platinum'), description=F)$ANOVA.test
## F1 LD F1 Model
## -----------------------
## Check that the order of the time and group levels are correct.
## Time level: TUR Cystectomy
## Group level: MVAC gemcitabine_platinum
## If the order is not correct, specify the correct order in time.order or group.order.

## Statistic df p-value
## Group 0.74679531 1 3.874928e-01
## Time 17.15151849 1 3.451334e-05
## Group:Time 0.03287889 1 8.561122e-01
res.aov <- anova_test(
data = py$log_dist3_cr, dv = Macrophage_to_tumor_median, wid = pt_number,
between = chemo_regimen, within = sample_type
)
get_anova_table(res.aov)
## ANOVA Table (type III tests)
##
## Effect DFn DFd F p p<.05 ges
## 1 chemo_regimen 1 66 1.044 0.31100 0.009000
## 2 sample_type 1 66 14.289 0.00034 * 0.080000
## 3 chemo_regimen:sample_type 1 66 0.109 0.74300 0.000662